Iowa Liquor Retail Sales¶

This dashboard provides insights into wholesale liquor purchases in Iowa, including trends in sales, revenue, and volumes sold. The dataset utilized is openly accessible on BigQuery, with a detailed description available here.

In [1]:
%%capture
!pip install google-cloud-bigquery
!pip install db-dtypes
!pip install plotly==5.24.1
In [2]:
import os
from IPython.display import display, HTML
from google.cloud import bigquery
import warnings

warnings.filterwarnings("ignore", message="BigQuery Storage module not found")
bigquery_client = bigquery.Client()

Get some metrics¶

In [3]:
query = """
    with
      month_limit as (
        select
          date_sub(
            max(date_trunc(date, month)),
            interval 10 year
          ) as ten_years_ago,
          date_sub(
            max(date_trunc(date, month)),
            interval 12 month
          ) as last_year,
          date_sub(
            max(date_trunc(date, month)),
            interval 1 month
          ) as previous_month,
          max(date_trunc(date, month)) as current_month
        from bigquery-public-data.iowa_liquor_sales.sales
      )
    select
      case date_trunc(sales.date, month)
        when month_limit.current_month then 'current_month'
        when month_limit.previous_month then 'previous_month'
        when month_limit.last_year then 'last_year'
        when month_limit.ten_years_ago then 'ten_years_ago'
      end as month,
      count(*) as number_of_sales,
      cast(sum(volume_sold_liters) as int64) as volume_sold_in_liters,
      cast(sum(sale_dollars) as int64) as revenue,
      count(distinct(store_number)) as number_of_selling_stores
    from bigquery-public-data.iowa_liquor_sales.sales
    inner join month_limit
      on date_trunc(sales.date, month) in (
              month_limit.last_year,
              month_limit.previous_month,
              month_limit.current_month,
              month_limit.ten_years_ago
          )
    group by
      month
"""
metrics_df = bigquery_client.query(query).to_dataframe()
metrics_df.set_index('month', inplace=True)
metrics_df = metrics_df.T
metrics_df.reset_index(inplace=True)
metrics_df = metrics_df.rename(columns={'index': 'metric_name'})
metrics_df['metric_name'] = [' '.join(word.capitalize() for word in k.split('_')) for k in metrics_df['metric_name']]
metrics = metrics_df.to_dict('records')
print(metrics)
[{'metric_name': 'Number Of Sales', 'last_year': 199690, 'previous_month': 176352, 'current_month': 195133, 'ten_years_ago': 179195}, {'metric_name': 'Volume Sold In Liters', 'last_year': 1806156, 'previous_month': 1648290, 'current_month': 1788695, 'ten_years_ago': 1579862}, {'metric_name': 'Revenue', 'last_year': 34164169, 'previous_month': 31308696, 'current_month': 33518799, 'ten_years_ago': 23262370}, {'metric_name': 'Number Of Selling Stores', 'last_year': 1898, 'previous_month': 1927, 'current_month': 1961, 'ten_years_ago': 1217}]

Define the html to display the metrics¶

In [4]:
from IPython.display import display, HTML
import jinja2

def get_metric_card(metric_name, metric_period):
    for metric_dict in metrics:
        if metric_dict['metric_name'] == metric_name:
            break
    metric_dict['current_month_str'] = "{:,.0f}".format(metric_dict['current_month'])
    if metric_dict['metric_name'] == 'Revenue':
        metric_dict['current_month_str'] = '$' + metric_dict['current_month_str']
    for k in ['previous_month', 'last_year', 'ten_years_ago']:
        metric_dict[f'{k}_diff'] = (metric_dict['current_month'] - metric_dict[k])/metric_dict[k]
        metric_dict[f'{k}_diff_str'] = "{:.1%}".format(metric_dict[f'{k}_diff'])
        if metric_dict[f'{k}_diff'] > 0:
            metric_dict[f'{k}_diff_str'] = '+' + metric_dict[f'{k}_diff_str']
        metric_dict[f'{k}_color'] = 'green' if metric_dict[f'{k}_diff'] >=0 else 'red'
    metric_dict['metric_period'] = metric_period
    template_html = jinja2.Template("""
        <div style="display: flex; justify-content: center; align-items: center;">
            <div style="text-align: center; margin: auto;">
                <h3 style="margin: 0">{{ metric_period }}</h3>
                <h1 style="margin: 5px">{{ metric_name }}</h1>
                <h2 style="margin: 10px">{{ current_month_str }}</h2>
                <hr style="margin: 0">
                <span style="margin: 0; color: {{ previous_month_color }};">{{ previous_month_diff_str }}</span>
                <div>vs last month</div>
                <hr style="margin: 0">
                <span style="margin: 0; color: {{ last_year_color }};">{{ last_year_diff_str }}</span>
                <div>vs last year</div>
                <hr style="margin: 0">
                <span style="margin: 0; color: {{ ten_years_ago_color }};">{{ ten_years_ago_diff_str }}</span>
                <div>vs 10 years ago</div>
            </div>
        </div>
    """)
    html = template_html.render(**metric_dict)
    display(HTML(html))

Get the latest month with data¶

In [5]:
query = """
    select
        format_date('%B %Y', max(date_trunc(date, month))) as lattest_month
    from bigquery-public-data.iowa_liquor_sales.sales
"""
metric_period = bigquery_client.query(query).to_dataframe()['lattest_month'][0]
In [6]:
get_metric_card('Volume Sold In Liters', metric_period)

March 2025

Volume Sold In Liters

1,788,695


+8.5%
vs last month

-1.0%
vs last year

+13.2%
vs 10 years ago
In [7]:
get_metric_card('Number Of Sales', metric_period)

March 2025

Number Of Sales

195,133


+10.6%
vs last month

-2.3%
vs last year

+8.9%
vs 10 years ago
In [8]:
get_metric_card('Revenue', metric_period)

March 2025

Revenue

$33,518,799


+7.1%
vs last month

-1.9%
vs last year

+44.1%
vs 10 years ago
In [9]:
get_metric_card('Number Of Selling Stores', metric_period)

March 2025

Number Of Selling Stores

1,961


+1.8%
vs last month

+3.3%
vs last year

+61.1%
vs 10 years ago

Yearly Sales¶

In [10]:
query = """
    with
        year_limit as (
            select
                max(extract(year from date)) as last_year
            from bigquery-public-data.iowa_liquor_sales.sales
        )
    select
        extract(year from date) as year,
        count(*) as nb_of_sales,
        sum(sale_dollars) as revenue
    from bigquery-public-data.iowa_liquor_sales.sales
    where
        extract(year from date)  < (select last_year from year_limit)
    group by
        year
    order by
        year
"""
yearly_sales_df = bigquery_client.query(query).to_dataframe()
In [11]:
import plotly.express as px
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    px.line(yearly_sales_df, x="year", y="nb_of_sales", title='Yearly Sales').data[0],
    secondary_y=False,
)

revenue_trace = px.line(yearly_sales_df, x="year", y="revenue", title='Yearly Sales').data[0]
revenue_trace.update(line=dict(color="red"))
fig.add_trace(revenue_trace, secondary_y=True)

fig.update_yaxes(title_text="Number of Sales", secondary_y=False)
fig.update_yaxes(title_text="Revenue", secondary_y=True)

fig.update_layout(
    title={
        'text': '<b>Yearly Sales with Revenue</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()

Monthly Sales¶

In [12]:
query = """
    with
      month_limit as (
        select
          date_sub(
            max(date_trunc(date, month)),
            interval 11 month
          ) as start_month,
        from bigquery-public-data.iowa_liquor_sales.sales
      )
    select
      date_trunc(sales.date, month) as month,
      count(*) as nb_of_sales,
      sum(sale_dollars) as revenue
    from bigquery-public-data.iowa_liquor_sales.sales
    inner join month_limit
      on date_trunc(sales.date, month) >= month_limit.start_month
    group by
        month
    order by
        month
"""
monthly_sales_df = bigquery_client.query(query).to_dataframe()
In [13]:
import plotly.express as px
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    px.line(monthly_sales_df, x="month", y="nb_of_sales", title='Monthly Sales').data[0],
    secondary_y=False,
)

revenue_trace = px.line(monthly_sales_df, x="month", y="revenue", title='Monthly Revenue').data[0]
revenue_trace.update(line=dict(color="red"))  # Set line color to red
fig.add_trace(revenue_trace, secondary_y=True)

fig.update_yaxes(title_text="Number of Sales", secondary_y=False)
fig.update_yaxes(title_text="Revenue", secondary_y=True)

fig.update_layout(
    title={
        'text': '<b>Monthly Sales with Revenue</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()

Volume Sold In Liters Per County¶

In [14]:
import plotly.express as px
import json

query = """
    with
      month_limit as (
        select
          max(date_trunc(date, month)) as last_month
        from bigquery-public-data.iowa_liquor_sales.sales
      )
    select
      county_fips_code,
      county_name,
      sum(volume_sold_liters) as volume_sold_liters
    from bigquery-public-data.iowa_liquor_sales.sales
    inner join month_limit
      on month_limit.last_month = date_trunc(sales.date, month)
    inner join bigquery-public-data.geo_us_boundaries.counties
      on st_contains(counties.county_geom, sales.store_location)
    group by
      county_fips_code,
      county_name
"""
county_volume_df = bigquery_client.query(query).to_dataframe()

with open("./geojsons/geojson-counties-fips.json") as f:
    counties = json.load(f)

target_states = ['19']
counties['features'] = [f for f in counties['features'] if f['properties']['STATE'] in target_states]


fig = px.choropleth(county_volume_df, geojson=counties, locations='county_fips_code', color='volume_sold_liters',
                    color_continuous_scale='Viridis_r',  # Reversed color scale
                    range_color=(min(county_volume_df['volume_sold_liters']), max(county_volume_df['volume_sold_liters'])),
                    scope='usa',
                    hover_data=['county_name', 'county_fips_code', 'volume_sold_liters'],  # Specify additional data for tooltip
                    labels={'volume_sold_liters': 'Volume Sold Liters', 'county_name': 'County', 'county_fips_code': 'FIPS'}  # Corrected labels
                    )
fig.update_layout(margin={'r': 50, 't': 30, 'l': 50, 'b': 50})

fig.update_geos(
    center=dict(lon=-93.5, lat=41.9),  # Coordinates for Iowa
    projection_scale=5
)


fig.update_layout(
    title={
        'text': f'<b>Volume Sold In Liters Per County ({metric_period})</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()

Types of drink sold¶

In [15]:
import plotly.graph_objects as go

query = """
    with
      month_limit as (
        select
          max(date_trunc(date, month)) as last_month
        from bigquery-public-data.iowa_liquor_sales.sales
      ),
      last_month_sales_with_types as (
        select
          sales.*,
          case
            when lower(category_name) like '%whiskies%' then 'Whiskey'
            when lower(category_name) like '%whiskey%' then 'Whiskey'
            when lower(category_name) like '%rum%' then 'Rum'
            when lower(category_name) like '%vodka%' then 'Vodka'
            when lower(category_name) like '%tequila%' then 'Tequila'
            when lower(category_name) like '%gin%' then 'Gin'
            when lower(category_name) like '%shnapps' then 'Shnapps'
            when lower(category_name) like '%shnapps' then 'Shnapps'
            when lower(category_name) like '%brandies%' then 'Brandy'
            when lower(category_name) like '%brandy%' then 'Brandy'
            else 'Others'
          end as drink_type
        from bigquery-public-data.iowa_liquor_sales.sales
        inner join month_limit
          on date_trunc(sales.date, month) = month_limit.last_month
      )
    select
        drink_type,
        sum(volume_sold_liters) as volume_sold_liters
    from last_month_sales_with_types
    group by
        drink_type
"""
last_month_drink_types_df = bigquery_client.query(query).to_dataframe()
In [16]:
import plotly.graph_objects as go


fig = go.Figure(data=[go.Pie(labels=last_month_drink_types_df['drink_type'],
                             values=last_month_drink_types_df['volume_sold_liters'],
                             textinfo='label+percent',
                             insidetextorientation='radial',
                             showlegend=False
                            )])


fig.update_layout(
    title={
        'text': f'<b>Types of Drinks Sold ({metric_period})</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    xaxis_title='Month',
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()

Top Vendors¶

In [17]:
query = """
    with
      month_limit as (
        select
          date_sub(
            max(date_trunc(date, month)),
            interval 11 month
          ) as first_month,
          max(date_trunc(date, month)) as last_month
        from bigquery-public-data.iowa_liquor_sales.sales
      ),
      top_vendors as (
        select
          vendor_name,
          sum(sale_dollars) as revenue
        from bigquery-public-data.iowa_liquor_sales.sales
        inner join month_limit
          on date_trunc(sales.date, month) = month_limit.last_month
        group by vendor_name
        order by revenue desc
        limit 3
      )
    select
      sales.vendor_name,
      date_trunc(sales.date, month) as month,
      sum(sale_dollars) as revenue
    from bigquery-public-data.iowa_liquor_sales.sales
    inner join top_vendors
      on top_vendors.vendor_name = sales.vendor_name
    inner join month_limit
      on month_limit.first_month <= date_trunc(sales.date, month)
    group by
      sales.vendor_name,
      month
    order by
      month,
      vendor_name
"""
top_vendors_df = bigquery_client.query(query).to_dataframe()
In [18]:
import plotly.graph_objects as go
import numpy as np

top_vendors = top_vendors_df['vendor_name'].unique()
x = np.array(top_vendors_df['month'].unique())

title = 'Main Source for News'
labels = top_vendors
colors = ['rgb(67,67,67)', 'rgb(115,115,115)', 'rgb(49,130,189)']

mode_size = [8, 8, 12]
line_size = [2, 2, 4]



fig = go.Figure()

for i in range(0, len(top_vendors)):
    current_vendor = top_vendors[i]
    y = top_vendors_df[top_vendors_df['vendor_name'] == current_vendor]['revenue']
    
    fig.add_trace(go.Scatter(x=x, y=y, mode='lines',
        name=labels[i],
        line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
    ))

fig.update_layout(
    title={
        'text': '<b>Top Vendors</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()

Daily Volume Sold Distribution¶

In [19]:
query = """
    with
      month_limit as (
        select
          date_sub(
            max(date_trunc(date, month)),
            interval 11 month
          ) as first_month
        from bigquery-public-data.iowa_liquor_sales.sales
      )
    select
      date,
      sum(volume_sold_liters) as volume_sold_liters
    from bigquery-public-data.iowa_liquor_sales.sales
    inner join month_limit
      on month_limit.first_month <= date_trunc(sales.date, month)
    group by
      date
"""
daily_liters_df = bigquery_client.query(query).to_dataframe()
daily_liters_df
Out[19]:
date volume_sold_liters
0 2024-06-05 107043.56
1 2024-12-06 72876.35
2 2024-05-22 72490.15
3 2024-09-11 102101.64
4 2025-01-23 71297.62
... ... ...
311 2024-11-14 92992.41
312 2024-08-15 74538.94
313 2025-02-05 114857.48
314 2025-01-15 90178.53
315 2024-12-08 13417.82

316 rows × 2 columns

In [20]:
import plotly.express as px
fig = px.histogram(daily_liters_df, x="volume_sold_liters", 
                   marginal="box",
                   hover_data=daily_liters_df.columns)

fig.update_layout(
    title={
        'text': '<b>Daily Volume Sold Distribution For The Past 12 Months</b>',
        'x':0.5,
        'font': {'size': 25, 'family': 'Arial'} 
    },
    xaxis_title='Volume Sold in Liters',
    yaxis_title='Count of Days',
    margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)

fig.show()
In [21]:
from datetime import datetime

utc_timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')

print(f"Updated at: {utc_timestamp}")
Updated at: 2025-04-30 02:39:03